In [1]:
import vm_tools as vmt
import cortex as cx
import matplotlib.pyplot as plt
import numpy as np
import nibabel
import copy
import time
import os
import numpy as np
import nibabel as nib
In [2]:
%matplotlib inline
In [10]:
def load_data(filename):
""" Return fMRI data corresponding to the given filename and prints
shape of the data array
----------
filename : string
This string should be a path to given file. The file must be
.nii
Returns
-------
data : numpy array
An array consisting of the data.
"""
img = nib.load(filename)
data = img.get_data()
data = data[:,:,:,4:]
print(data.shape)
return data
def combine_run_arrays(run_array_lst):
""" Returns a combined 4d array by concatinating based on time (axis = 3)
----------
run_array_lst : array of 4d numpy arrays
Returns
-------
4d numpy array
"""
return np.concatenate(run_array_lst, axis = 3)
In [8]:
#All file strings corresponding to BOLD data for subject 4
files = ['task001_run001.bold_dico.nii', 'task001_run002.bold_dico.nii',
'task001_run003.bold_dico.nii', 'task001_run004.bold_dico.nii',
'task001_run005.bold_dico.nii', 'task001_run006.bold_dico.nii',
'task001_run007.bold_dico.nii', 'task001_run008.bold_dico.nii']
all_data = []
for index, filename in enumerate(files):
new_data = load_data('../../data/' + filename) #load_data function drops first 4 for us
num_vols = new_data.shape[-1]
if index != 0 and index != 7:
new_num_vols = num_vols - 4
new_data = new_data[:,:,:,:new_num_vols] #Drop last 4 volumes for middle runs
print(new_data.shape[-1])
all_data.append(new_data)
combined_runs = combine_run_arrays(all_data)
combined_runs = combined_runs[:,:,:,9:] #First 17 seconds are credits/no scene id so drop
In [8]:
# Get stimulus feature spaces
stim_fs_fpath = '/home/yuanwang/projects/OpenfMRI/data'
stim_fs_file = np.load(stim_fs_fpath)
stim_fs_est =
stim_fs_val =
# stim_fs_est_s1 = stim_fs_est_allsess[0:len(stim_fs_est_allsess)/3,:]
# stim_fs_val_s1 = stim_fs_val_allsess[0:len(stim_fs_val_allsess)/3,:]
# stim_fs_est = np.zeros((np.shape(stim_fs_est_s1)[0]/2,np.shape(stim_fs_est_s1)[1]))
# stim_fs_val = np.zeros((np.shape(stim_fs_val_s1)[0]/2,np.shape(stim_fs_val_s1)[1]))
# for t in range(np.shape(stim_fs_est)[0]):
# stim_fs_est[t,:] = np.true_divide(stim_fs_est_s1[2*t,:] + stim_fs_est_s1[2*t+1,:],2)
# for t in range(np.shape(stim_fs_val)[0]):
# stim_fs_val[t,:] = np.true_divide(stim_fs_val_s1[2*t,:] + stim_fs_val_s1[2*t+1,:],2)
In [9]:
# Estimation feature space has 1200 time points, 6555 features
print('Estimation feature space shape: %s'%repr(stim_fs_est.shape))
# Validation has 90 time points, 6555 features
print('Validation feature space shape: %s'%repr(stim_fs_val.shape))
In [10]:
# Get data
# with h5py.File('/data/movie_data/SubML_movies_session1_est.hf5','r') as hf:
# data_est = hf['data'].value
# with h5py.File('/data/movie_data/SubML_movies_session1_val.hf5','r') as hf:
# data_val = hf['data'].value
img = nib.load('ds114_sub004_t2r1.nii')
data_
In [11]:
data_est.shape
Out[11]:
In [12]:
# Get cortical mask for this subject
# transforming data from 3d to 1d
mask = cx.db.get_mask('MLfs2','MLfs2_nb',type='cortical')
In [13]:
# Mask data
data_est_masked = data_est[:,mask]
data_val_masked = data_val[:,mask]
# Show size of masked data
print('Size of masked estimation data is %s'%repr(data_est_masked.shape))
print('Size of masked validation data is %s'%repr(data_val_masked.shape))
In [14]:
print('Size of masked estimation data is %s'%repr(data_est_masked.shape))
print('Size of masked validation data is %s'%repr(data_val_masked.shape))
In [15]:
# Create lagged stimulus matrix
efs = vmt.utils.add_lags(stim_fs_est,[2,3,4])
vfs = vmt.utils.add_lags(stim_fs_val,[2,3,4])
# Add column of ones
efs = vmt.utils.add_constant(efs,is_first=True)
vfs = vmt.utils.add_constant(vfs,is_first=True)
In [16]:
alpha = np.logspace(0,6,10)
alpha
Out[16]:
In [17]:
# Run regression
n_splits = 10 # number of subdivisions of validation data for cross validation of ridge parameter (alpha)
n_resamps = 10 # number of times to compute regression & prediction within training data (can be <= n_splits)
chunk_sz = 10000 # number of voxels to fit at once. Memory-saving.
pthr = 0.005 # Ridge parameter is chosen based on how many voxels are predicted above a correlation threshold
# for each alpha value (technically it's slightly more complicated than that, see the code).
# This p value sets that correlation threshold.
t0 = time.time()
out = vmt.regression.ridge_cv(efs,data_est_masked,val_fs=vfs,val_data=data_val_masked,alphas=alpha,n_resamps=n_resamps,
n_splits=n_splits,chunk_sz=chunk_sz,pthr=pthr,is_verbose=True)
t1 = time.time()
print("Elapsed time is: %d min, %d sec"%((t1-t0)/60,(t1-t0)%60))
In [18]:
# Plot number of voxels with significant prediction accuracy within the
# estimation data for each alpha value
na = len(out['n_sig_vox_byalpha'])
plt.plot(range(na),out['n_sig_vox_byalpha'],'ko-',lw=2)
# plt.xticks(range(na),vmt.regression.DEFAULT_ALPHAS,rotation=45)
plt.xticks(range(na),alpha,rotation=45)
plt.xlabel('Regularization parameter')
_ = plt.ylabel('Number of voxels\naccurately predicted')
In [20]:
out['cc'].shape
Out[20]:
In [21]:
plt.hist(np.nan_to_num(out['cc']),21)
Out[21]:
In [22]:
mask.sum()
Out[22]:
In [23]:
vmt.plot_utils.slice_3d_matrix(mask,0);
In [24]:
# Create pycortex Volume out of predictions
br = np.zeros(mask.shape)
br[mask] = out['cc']
In [34]:
# output the regression output
aff_output_fpath = '/home/yuanwang/projects/affordance/output/af_out_ml_all_w2v3.hdf5'
with h5py.File(aff_output_fpath,'w') as f:
brDset = f.create_dataset('br', br.shape, br.dtype)
brDset[:] = br
weightsDset = f.create_dataset('weights',out['weights'].shape, out['weights'].dtype)
weightsDset[:] = out['weights']
In [4]:
# input the regression output in the case of firefox crash
with h5py.File('/home/yuanwang/projects/affordance/output/af_out_ml_all.hdf5','r') as hf:
br = hf['br'].value
weights = hf['weights'].value.T
In [27]:
br.shape
Out[27]:
In [28]:
br.min()
Out[28]:
In [29]:
np.nanmin(br)
Out[29]:
In [30]:
np.nanmax(br)
Out[30]:
In [5]:
V = cx.Volume(br,'MLfs2','MLfs2_nb',cmap='hot',vmin=0,vmax=0.6)
# cx.webgl.show(V)
In [7]:
cx.quickflat.make_figure(V,with_curvature=True)
Out[7]:
In [8]:
cx.quickflat.make_png('/home/yuanwang/projects/affordance/figure/aff_all_ML.png',V,dpi=300)
In [52]:
# # import the saved output
# with h5py.File('/home/yuanwang/projects/affordance/output/af_out_ml.hdf5','r') as hf:
# aff_br = hf['br'].value
# aff_weights = hf['weights'].value.T
In [9]:
roimasks,roinames = cx.get_roi_masks('MLfs2','MLfs2_nb',Dst='cortical')
In [19]:
roinames
Out[19]:
In [20]:
roi_list = ['EBA','FFA', 'OFA', 'V1','V2','V3','V4','LO']
In [25]:
aff_weights.shape
Out[25]:
In [94]:
mkd = np.zeros(roimasks.shape)
# for roi in roi_list:
m_eba = roimasks==roinames[roi_list[0]]
mkd[m_eba] = out['cc']
In [87]:
m_eba.shape
Out[87]:
In [89]:
out['cc'].shape
Out[89]:
In [90]:
mask.shape
Out[90]:
In [92]:
mkd.shape
Out[92]:
In [93]:
mkd = np.zeros(roimasks.shape)
In [38]:
np.shape(aff_weights)
Out[38]:
In [36]:
mask
Out[36]:
In [22]:
rois = vmt.ROISet.from_pycortex('AH','NaturalMovies','AHfs','AHfs_nb')
rois
Out[22]:
In [63]:
rois['V1'].shape
Out[63]:
In [86]:
mg = np.zeros(v1_mask.shape)
mg[v1_mask] = n2n(rois['V1'].cc)
In [67]:
rois = vmt.ROISet.from_pycortex('AH','NaturalMovies','AHfs','AHfs_nb')
In [68]:
# Apply roi mask to estimated correlation coefficients and weights
rois.apply_mask(pre_mask=mask,cc=out['cc'],weights=out['weights'])
In [69]:
# Show prediction accuracy for V1 voxels and for FFA voxels
n2n = np.nan_to_num
rois_to_plot = ['V1','FFA','S1H','M1H']
fig = plt.figure(figsize=(15,15))
for iroi,roi in enumerate(rois_to_plot):
plt.subplot(len(rois_to_plot),1,iroi+1)
plt.hist(n2n(rois[roi].cc),30)
plt.xlabel('Prediction accuracy ($r$)')
plt.ylabel('Count')
_ = plt.title(roi)
In [23]:
gab,pp = vmt.viz.load_gabor(stim_fs_val_file)
In [24]:
h = plt.figure(figsize=(6,6))
ax = plt.subplot(111)
# Prep weights for display
vox = np.argmax(
avgwts = vmt.utils.avg_wts(out['weights'].T[vox]),
Mode='wholematrixrespectzero')/2.0+0.5
vmt.viz.show_gabor(to_plot,pp,bgcol=(0.5,0.5,0.5),ax=ax)
plt.title('$r$ = %0.2f'%out['cc'][vox])
In [49]:
DEFAULT_ALPHAS=np.array([0]+[2**x for x in range(10,21)])
In [50]:
DEFAULT_ALPHAS
Out[50]:
In [ ]: